Addfusion

带参数alpha的逐元素求和

\[output_i = in0_i + in1_i*alpha\]
输入:
  • in0 - 第一个输入数据地址。

  • in1 - 第二个输入数据地址。

  • alpha - 乘法因子。

  • size - 计算长度。

  • core_mask - 核掩码。

输出:
  • out - 计算结果地址。

支持平台:

FT78NE MT7004

备注

  • FT78NE 支持int8, int16, int32, fp32, fp64, cplx64, cplx128

  • MT7004 支持fp16, fp32, int16, int32, cplx64

共享存储版本:

void i8_addext_s(int8_t *in0, int8_t *in1, int8_t alpha, int8_t *out, int size, int core_mask)
void i16_addext_s(int16_t *in0, int16_t *in1, int16_t alpha, int16_t *out, int size, int core_mask)
void i32_addext_s(int32_t *in0, int32_t *in1, int32_t alpha, int32_t *out, int size, int core_mask)
void hp_addext_s(half *in0, half *in1, half alpha, half *out, int size, int core_mask)
void fp_addext_s(float *in0, float *in1, float alpha, float *out, int size, int core_mask)
void dp_addext_s(double *in0, double *in1, double alpha, double *out, int size, int core_mask)
void c64_addext_s(float *in0, float *in1, float alpha, float *out, int size, int core_mask)
void c128_addext_s(double *in0, double *in1, double alpha, double *out, int size, int core_mask)

C调用示例:

 1//FT78NE示例
 2#include <stdio.h>
 3#include <addfusion.h>
 4
 5int main(int argc, char* argv[]) {
 6    float *in0 = (float *)0xA0000000;   //input在DDR空间
 7    float *in1 = (float *)0xB0000000;
 8    bool *out = (bool *)0xC0000000;
 9    float alpha = 0.1;
10    int size = 1000;
11    int core_mask = 0xff;
12    fp_addext_s(in0, in1, alpha, out, size, core_mask);
13    return 0;
14}

私有存储版本:

void i8_addext_p(int8_t *in0, int8_t *in1, int8_t alpha, int8_t *out, int size)
void i16_addext_p(int16_t *in0, int16_t *in1, int16_t alpha, int16_t *out, int size)
void i32_addext_p(int32_t *in0, int32_t *in1, int32_t alpha, int32_t *out, int size)
void hp_addext_p(half *in0, half *in1, half alpha, half *out, int size)
void fp_addext_p(float *in0, float *in1, float alpha, float *out, int size)
void dp_addext_p(double *in0, double *in1, double alpha, double *out, int size)
void c64_addext_p(float *in0, float *in1, float alpha, float *out, int size)
void c128_addext_p(double *in0, double *in1, double alpha, double *out, int size)

C调用示例:

 1//FT78NE示例
 2#include <stdio.h>
 3#include <addfusion.h>
 4
 5int main(int argc, char* argv[]) {
 6    float *in0 = (float *)0x10000000;   //input在L2空间
 7    float *in1 = (float *)0x10001000;
 8    bool *out = (bool *)0xC0000000;
 9    int length = 1000;
10    fp_addext_s(in0, in1, alpha, out, size);
11    return 0;
12}

逐元素求和并计算relu激活函数

\[out_i = max(0, in0_i + in1_i)\]
输入:
  • in0 - 第一个输入数据地址。

  • in1 - 第二个输入数据地址。

  • size - 计算长度。

  • core_mask - 核掩码。

输出:
  • out - 计算结果地址。

支持平台:

FT78NE MT7004

备注

  • FT78NE 支持int8, int16, int32, fp32, fp64, cplx64, cplx128

  • MT7004 支持fp16, fp32, int16, int32, cplx64

共享存储版本:

void i8_addrelu_s(int8_t *in0, int8_t *in1, int8_t *out, int size, int core_mask)
void i16_addrelu_s(int16_t *in0, int16_t *in1, int16_t *out, int size, int core_mask)
void i32_addrelu_s(int32_t *in0, int32_t *in1, int32_t *out, int size, int core_mask)
void hp_addrelu_s(half *in0, half *in1, half *out, int size, int core_mask)
void fp_addrelu_s(float *in0, float *in1, float *out, int size, int core_mask)
void dp_addrelu_s(double *in0, double *in1, double *out, int size, int core_mask)
void c64_addrelu_s(float *in0, float *in1, float *out, int size, int core_mask)
void c128_addrelu_s(double *in0, double *in1, double *out, int size, int core_mask)

C调用示例:

 1//FT78NE示例
 2#include <stdio.h>
 3#include <addfusion.h>
 4
 5int main(int argc, char* argv[]) {
 6    float *in0 = (float *)0xA0000000;   //input在DDR空间
 7    float *in1 = (float *)0xB0000000;
 8    bool *out = (bool *)0xC0000000;
 9    int size = 1000;
10    int core_mask = 0xff;
11    fp_addrelu_s(in0, in1, out, size, core_mask);
12    return 0;
13}

私有存储版本:

void i8_addrelu_p(int8_t *in0, int8_t *in1, int8_t *out, int size)
void i16_addrelu_p(int16_t *in0, int16_t *in1, int16_t *out, int size)
void i32_addrelu_p(int32_t *in0, int32_t *in1, int32_t *out, int size)
void hp_addrelu_p(half *in0, half *in1, half *out, int size)
void fp_addrelu_p(float *in0, float *in1, float *out, int size)
void dp_addrelu_p(double *in0, double *in1, double *out, int size)
void c64_addrelu_p(float *in0, float *in1, float *out, int size)
void c128_addrelu_p(double *in0, double *in1, double *out, int size)

C调用示例:

 1//FT78NE示例
 2#include <stdio.h>
 3#include <addfusion.h>
 4
 5int main(int argc, char* argv[]) {
 6    float *in0 = (float *)0x10000000;   //input在L2空间
 7    float *in1 = (float *)0x10001000;
 8    bool *out = (bool *)0xC0000000;
 9    int size = 1000;
10    fp_addrelu_s(in0, in1, out, size);
11    return 0;
12}

逐元素求和并计算relu6激活函数

\[out_i = min(max(0, in0_i + in1_i), 6)\]
输入:
  • in0 - 第一个输入数据地址。

  • in1 - 第二个输入数据地址。

  • size - 计算长度。

  • core_mask - 核掩码。

输出:
  • out - 计算结果地址。

支持平台:

FT78NE MT7004

备注

  • FT78NE 支持int8, int16, int32, fp32, fp64, cplx64, cplx128

  • MT7004 支持fp16, fp32, int16, int32, cplx64

共享存储版本:

void i8_addrelu6_s(int8_t *in0, int8_t *in1, int8_t *out, int size, int core_mask)
void i16_addrelu6_s(int16_t *in0, int16_t *in1, int16_t *out, int size, int core_mask)
void i32_addrelu6_s(int32_t *in0, int32_t *in1, int32_t *out, int size, int core_mask)
void hp_addrelu6_s(half *in0, half *in1, half *out, int size, int core_mask)
void fp_addrelu6_s(float *in0, float *in1, float *out, int size, int core_mask)
void dp_addrelu6_s(double *in0, double *in1, double *out, int size, int core_mask)
void c64_addrelu6_s(float *in0, float *in1, float *out, int size, int core_mask)
void c128_addrelu6_s(double *in0, double *in1, double *out, int size, int core_mask)

C调用示例:

 1//FT78NE示例
 2#include <stdio.h>
 3#include <addfusion.h>
 4
 5int main(int argc, char* argv[]) {
 6    float *in0 = (float *)0xA0000000;   //input在DDR空间
 7    float *in1 = (float *)0xB0000000;
 8    bool *out = (bool *)0xC0000000;
 9    int size = 1000;
10    int core_mask = 0xff;
11    fp_addrelu6_s(in0, in1, out, size, core_mask);
12    return 0;
13}

私有存储版本:

void i8_addrelu6_p(int8_t *in0, int8_t *in1, int8_t *out, int size)
void i16_addrelu6_p(int16_t *in0, int16_t *in1, int16_t *out, int size)
void i32_addrelu6_p(int32_t *in0, int32_t *in1, int32_t *out, int size)
void hp_addrelu6_p(half *in0, half *in1, half *out, int size)
void fp_addrelu6_p(float *in0, float *in1, float *out, int size)
void dp_addrelu6_p(double *in0, double *in1, double *out, int size)
void c64_addrelu6_p(float *in0, float *in1, float *out, int size)
void c128_addrelu6_p(double *in0, double *in1, double *out, int size)

C调用示例:

 1//FT78NE示例
 2#include <stdio.h>
 3#include <addfusion.h>
 4
 5int main(int argc, char* argv[]) {
 6    float *in0 = (float *)0x10000000;   //input在L2空间
 7    float *in1 = (float *)0x10001000;
 8    bool *out = (bool *)0xC0000000;
 9    int size = 1000;
10    fp_addrelu6_s(in0, in1, out, size);
11    return 0;
12}